from lxml import etree


# 先声明，再解析
text = '''
<div>
    <ul>
        <li class="item-0"><a href="link1.html">first item</a></li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-inactive"><a href="link3.html">third item</a></li>
        <li class="item-1"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a>
    </ul>
</div>
'''

# html = etree.HTML(text)
# result = etree.tostring(html)
# print(result.decode('utf-8'))

# 读取文本文件进行解析
# html = etree.parse('./test.html', etree.HTMLParser())
# result = etree.tostring(html)
# print(result.decode('utf-8'))
# result = html.xpath('//*')
# result = html.xpath('//ul//a')
# print(result)
# print(result[0])


# 查找父节点
html = etree.parse('./test.html', etree.HTMLParser())
result = html.xpath('//a[@href="link4.html"]/../@class')
print(result)